This notebook contains research on using KNN for the Kidney Graft Genetics Project to predict graft survival.
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5 √ purrr 0.3.4
√ tibble 3.1.6 √ dplyr 1.0.8
√ tidyr 1.2.0 √ stringr 1.4.0
√ readr 2.1.2 √ forcats 0.5.1
-- Conflicts ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
library(tuneR)
Attaching package: ‘tuneR’
The following object is masked from ‘package:Biobase’:
channel
The following object is masked from ‘package:BiocGenerics’:
normalize
library(devtools)
Loading required package: usethis
library(ggplot2)
library(tsfeatures)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
library(class)
library(cvTools)
Loading required package: lattice
Loading required package: robustbase
Attaching package: ‘robustbase’
The following object is masked from ‘package:Biobase’:
rowMedians
library(randomForest)
randomForest 4.7-1
Type rfNews() to see new features/changes/bug fixes.
Attaching package: ‘randomForest’
The following object is masked from ‘package:dplyr’:
combine
The following object is masked from ‘package:ggplot2’:
margin
The following object is masked from ‘package:Biobase’:
combine
The following object is masked from ‘package:BiocGenerics’:
combine
library(GEOquery)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Setting options('download.file.method.GEOquery'='auto')
Setting options('GEOquery.inmemory.gpl'=FALSE)
library(R.utils)
Warning: package ‘R.utils’ was built under R version 4.1.3
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.8.1 (2020-08-26 16:20:06 UTC) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.24.0 (2020-08-26 16:11:58 UTC) successfully loaded. See ?R.oo for help.
Attaching package: ‘R.oo’
The following object is masked from ‘package:R.methodsS3’:
throw
The following objects are masked from ‘package:devtools’:
check, unload
The following objects are masked from ‘package:methods’:
getClasses, getMethods
The following objects are masked from ‘package:base’:
attach, detach, load, save
R.utils v2.11.0 (2021-09-26 08:30:02 UTC) successfully loaded. See ?R.utils for help.
Attaching package: ‘R.utils’
The following object is masked from ‘package:GEOquery’:
gunzip
The following object is masked from ‘package:tidyr’:
extract
The following object is masked from ‘package:utils’:
timestamp
The following objects are masked from ‘package:base’:
cat, commandArgs, getOption, inherits, isOpen, nullfile, parse, warnings
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
library(limma)
Attaching package: ‘limma’
The following object is masked from ‘package:BiocGenerics’:
plotMA
library(dplyr)
library(e1071)
Warning: package ‘e1071’ was built under R version 4.1.3
library(DT)
Warning: package ‘DT’ was built under R version 4.1.3
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(viridis)
Warning: package ‘viridis’ was built under R version 4.1.3
Loading required package: viridisLite
library(plotly)
Warning: package ‘plotly’ was built under R version 4.1.3
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:viridis’:
viridis_pal
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(CPOP) #devtools::install_github("sydneybiox/CPOP") #Will take a while to download and requires user input in console
library(matrixStats)
Warning: package ‘matrixStats’ was built under R version 4.1.3
Attaching package: ‘matrixStats’
The following objects are masked from ‘package:robustbase’:
colMedians, rowMedians
The following object is masked from ‘package:dplyr’:
count
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
gse36059 = getGEO("GSE36059")[[1]]
gse48581 = getGEO("GSE48581")[[1]]
gse129166 = getGEO("GSE129166")[[1]]
gse36059_f = fData(gse36059)
gse36059_f
gse48581_f = fData(gse48581)
gse48581_f
gse129166_f = fData(gse129166)
gse129166_f
gse36059_p = pData(gse36059)
gse36059_p
gse48581_p = pData(gse48581)
gse48581_p
gse129166_p = pData(gse129166)
gse129166_p
#Remove Nephrectomy outcomes
gse36059_p = gse36059_p[!(gse36059_p$characteristics_ch1=="diagnosis: Nephrectomy"),]
gse48581_p = gse48581_p[!(gse48581_p$characteristics_ch1=="diagnosis (tcmr, abmr, mixed, non-rejecting, nephrectomy): nephrectomy"),]
#Encodes stable as 0 and rejecting as 1
gse36059_p$diagnosis = ifelse(gse36059_p$characteristics_ch1 == "diagnosis: non-rejecting", 0, 1)
gse48581_p$diagnosis = ifelse(gse48581_p$characteristics_ch1.1 == "diagnosis (tcmr, abmr, mixed, non-rejecting, nephrectomy): non-rejecting", 0, 1)
gse129166_p$diagnosis = ifelse((gse129166_p$characteristics_ch1.1 == "tcmr (no: 0_borderline:1_TCMR:2): 0") & (gse129166_p$characteristics_ch1.2 == "abmr (no: 0_Yes:1): 0"), 0, 1)
#Encodes stable as 0, ABMR as 1, TCMR as 2, Mixed as 3
gse36059_p$exact_diagnosis = ifelse(gse36059_p$characteristics_ch1 == "diagnosis: non-rejecting", 0, ifelse(gse36059_p$characteristics_ch1 =="diagnosis: ABMR", 1, ifelse(gse36059_p$characteristics_ch1 == "diagnosis: TCMR", 2, 3)))
gse48581_p$exact_diagnosis = ifelse(gse48581_p$characteristics_ch1.1 == "diagnosis (tcmr, abmr, mixed, non-rejecting, nephrectomy): non-rejecting", 0, ifelse(gse48581_p$characteristics_ch1.1 =="diagnosis (tcmr, abmr, mixed, non-rejecting, nephrectomy): ABMR", 1, ifelse(gse48581_p$characteristics_ch1.1 == "diagnosis (tcmr, abmr, mixed, non-rejecting, nephrectomy): TCMR", 2, 3)))
gse129166_p$exact_diagnosis = ifelse((gse129166_p$characteristics_ch1.1 == "tcmr (no: 0_borderline:1_TCMR:2): 0") & (gse129166_p$characteristics_ch1.2 == "abmr (no: 0_Yes:1): 0"), 0, ifelse((gse129166_p$characteristics_ch1.1 != "tcmr (no: 0_borderline:1_TCMR:2): 0") & (gse129166_p$characteristics_ch1.2 != "abmr (no: 0_Yes:1): 0"), 3, ifelse((gse129166_p$characteristics_ch1.1 != "tcmr (no: 0_borderline:1_TCMR:2): 0") & (gse129166_p$characteristics_ch1.2 == "abmr (no: 0_Yes:1): 0"), 2, 1)))
gse36059_p
gse48581_p
gse129166_p
Main outcomes are non-rejecting, TCMR (acute T-cell–mediated rejection), ABMR (anti-donor antibody-mediated rejection), and MIXED. Also has nephrectomy.
gse36059_ex = data.frame(t(exprs(gse36059)))
gse36059_ex